#  File src/library/stats/R/kmeans.R
#  Part of the R package, https://www.R-project.org
#
#  Copyright (C) 1995-2017 The R Core Team
#
#  This program is free software; you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation; either version 2 of the License, or
#  (at your option) any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  A copy of the GNU General Public License is available at
#  https://www.R-project.org/Licenses/

kmeans <-
function(x, centers, iter.max = 10L, nstart = 1L,
	 algorithm = c("Hartigan-Wong", "Lloyd", "Forgy", "MacQueen"),
         trace = FALSE)
{
    .Mimax <- .Machine$integer.max
    do_one <- function(nmeth) {
        switch(nmeth,
           {                            # 1 : Hartigan-Wong
	       isteps.Qtran <- as.integer(min(.Mimax, 50 * m))
	       iTran <- c(isteps.Qtran, integer(k)) # correct for k=0
               Z <- .Fortran(C_kmns, x, m, p,
                             centers = centers,
                             as.integer(k), c1 = integer(m), c2 = integer(m),
                             nc = integer(k), double(k), double(k), ncp=integer(k),
                             D = double(m), iTran = iTran, live = integer(k),
                             iter = iter.max, wss = double(k),
                             ifault = as.integer(trace))
               switch(Z$ifault,
                      ## 1:
                      stop("empty cluster: try a better set of initial centers",
                           call. = FALSE),
                      ## 2:
                          Z$iter <- max(Z$iter, iter.max+1L), # -> and warn below
                      ## 3:
                      stop("number of cluster centres must lie between 1 and nrow(x)",
                           call.=FALSE),
                      ## 4: {new @ 2013-06-30; maybe better fix (in Fortran) ?}
                      warning(gettextf("Quick-TRANSfer stage steps exceeded maximum (= %d)",
                                       isteps.Qtran),
                              call.=FALSE)
                      )
           },
           {                            # 2 : Lloyd-Forgy
               Z <- .C(C_kmeans_Lloyd, x, m, p,
                       centers = centers, k,
                       c1 = integer(m), iter = iter.max,
                       nc = integer(k), wss = double(k))
           },
           {                            # 3 : MacQueen
               Z <- .C(C_kmeans_MacQueen, x, m, p,
                       centers = as.double(centers), k,
                       c1 = integer(m), iter = iter.max,
                       nc = integer(k), wss = double(k))
           })

	if(m23 <- any(nmeth == c(2L, 3L))) {
	    if(any(Z$nc == 0))
		warning("empty cluster: try a better set of initial centers",
			call. = FALSE)
	}
	if(Z$iter > iter.max) {
	    warning(sprintf(ngettext(iter.max,
				     "did not converge in %d iteration",
				     "did not converge in %d iterations"),
			    iter.max), call. = FALSE, domain = NA)
	    if(m23) Z$ifault <- 2L
	}
        if(nmeth %in% c(2L, 3L)) {
            if(any(Z$nc == 0))
                warning("empty cluster: try a better set of initial centers",
                        call. = FALSE)
        }
	Z
    }
    x <- as.matrix(x)
    ## as.integer(<too large>) gives NA ==> not allowing too large nrow() / ncol():
    m <- as.integer(nrow(x)); if(is.na(m)) stop("invalid nrow(x)")
    p <- as.integer(ncol(x)); if(is.na(p)) stop("invalid ncol(x)")
    if(missing(centers))
	stop("'centers' must be a number or a matrix")
    nmeth <- switch(match.arg(algorithm),
                    "Hartigan-Wong" = 1L,
                    "Lloyd" = 2L, "Forgy" = 2L,
                    "MacQueen" = 3L)
    storage.mode(x) <- "double"
    if(length(centers) == 1L) {
	k <- centers
        ## we need to avoid duplicates here
        if(nstart == 1L)
            centers <- x[sample.int(m, k), , drop = FALSE]
        if(nstart >= 2L || any(duplicated(centers))) {
            cn <- unique(x)
            mm <- nrow(cn)
            if(mm < k)
                stop("more cluster centers than distinct data points.")
            centers <- cn[sample.int(mm, k), , drop=FALSE]
        }
    } else {
	centers <- as.matrix(centers)
        if(any(duplicated(centers)))
            stop("initial centers are not distinct")
        cn <- NULL
	k <- nrow(centers)
        if(m < k)
            stop("more cluster centers than data points")
    }
    k <- as.integer(k)
    if(is.na(k)) stop(gettextf("invalid value of %s", "'k'"), domain = NA)
    if (k == 1L) nmeth <- 3L # Hartigan-Wong, (Fortran) needs k > 1
    iter.max <- as.integer(iter.max)
    if(is.na(iter.max) || iter.max < 1L) stop("'iter.max' must be positive")
    if(ncol(x) != ncol(centers))
	stop("must have same number of columns in 'x' and 'centers'")
    storage.mode(centers) <- "double"
    Z <- do_one(nmeth)
    best <- sum(Z$wss)
    if(nstart >= 2L && !is.null(cn))
	for(i in 2:nstart) {
	    centers <- cn[sample.int(mm, k), , drop=FALSE]
	    ZZ <- do_one(nmeth)
	    if((z <- sum(ZZ$wss)) < best) {
		Z <- ZZ
		best <- z
	    }
	}
    centers <- matrix(Z$centers, k)
    dimnames(centers) <- list(1L:k, dimnames(x)[[2L]])
    cluster <- Z$c1
    if(!is.null(rn <- rownames(x)))
        names(cluster) <- rn
    totss <- sum(scale(x, scale = FALSE)^2)
    structure(list(cluster = cluster, centers = centers, totss = totss,
		   withinss = Z$wss, tot.withinss = best,
		   betweenss = totss - best, size = Z$nc,
		   iter = Z$iter, ifault = Z$ifault),
	      class = "kmeans")
}

## modelled on print methods in the cluster package
print.kmeans <- function(x, ...)
{
    cat("K-means clustering with ", length(x$size), " clusters of sizes ",
        paste(x$size, collapse = ", "), "\n", sep = "")
    cat("\nCluster means:\n")
    print(x$centers, ...)
    cat("\nClustering vector:\n")
    print(x$cluster, ...)
    cat("\nWithin cluster sum of squares by cluster:\n")
    print(x$withinss, ...)
    ratio <- sprintf(" (between_SS / total_SS = %5.1f %%)\n",
                     100 * x$betweenss/x$totss)
    cat(sub(".", getOption("OutDec"), ratio, fixed = TRUE),
	"Available components:\n", sep = "\n")
    print(names(x))
    if(!is.null(x$ifault) && x$ifault == 2L)
	cat("Warning: did *not* converge in specified number of iterations\n")
    invisible(x)
}

fitted.kmeans <- function(object, method = c("centers", "classes"), ...)
{
	method <- match.arg(method)
	if (method == "centers") object$centers[object$cl, , drop = FALSE]
	else object$cl
}

